import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from collections import Counter


def analyze_data(news_list):
    # 合并所有标题
    all_text = ' '.join([news['title'] for news in news_list])

    # 使用jieba分词
    words = jieba.cut(all_text)

    # 统计词频
    word_counts = Counter()
    for word in words:
        if len(word) > 1:  # 过滤单字
            word_counts[word] += 1

    # 生成词云
    wc = WordCloud(
        font_path='simhei.ttf',
        width=800,
        height=600,
        background_color='white'
    ).generate_from_frequencies(word_counts)

    plt.figure(figsize=(10, 8))
    plt.imshow(wc)
    plt.axis('off')
    plt.savefig('wordcloud.png')
    plt.close()

    # 返回前20高频词
    return word_counts.most_common(20)